#!/usr/local/bin/Rscript

# Produces histograms of peak size and boxplots of tag pileup.
#
# USAGE
# macs_stats_graphs.R --args macs=path/to/input/macs.combined.concatenated.bed
#
# Note: No space between arg name (macs=) and file name
# 
# INPUT file looks like this:
# ------------------------------------------------------------------------------
# chr	summit_start	summit_end	macs_id	pileup_at_summit	strand	peak_start	peak_end	peak_length	summit_dist_from_start	tags_in_region	log_pvalue	fold_enrich	fdr_percent	file_basename	project_id
# chr1	271	272	new001_peak_1	7.00	+	77	468	391	195	7	253.77	577.14	100	new001	chipseq_test
# chr1	709748	709749	new001_peak_2	3.00	+	709488	709904	416	261	4	124.55	247.34	92.00	new001	chipseq_test
# chr1	711178	711179	new001_peak_3	2.00	+	710997	711451	454	182	4	122.29	164.90	90.72	new001	chipseq_test
# chr1	742481	742482	new001_peak_4	2.00	+	742186	742797	611	296	4	114.58	164.90	84.15	new001	chipseq_test
# chr1	745350	745351	new001_peak_5	2.00	+	745079	745535	456	272	3	98.80	164.90	78.25	new001	chipseq_test
# ------------------------------------------------------------------------------

WIDTH= 900

cmd_args = commandArgs();
macs_input= cmd_args[grep('^macs=', cmd_args)]
macs_input= sub(pattern= '^macs=', replacement= '', x= macs_input, perl= TRUE)

macs<- read.table(macs_input, sep= '\t', header= TRUE)

samples= unique(macs$file_basename)
no_samples= length(unique(macs$file_basename))

options(warn= -1) ## X11 throws warnings

png('redmine_wiki/macs_peak_length_trim_hist.png', width = WIDTH, height = WIDTH/4 * ceiling(no_samples/4), pointsize = 18)
par(mfrow= c(ceiling(no_samples/4), 4),  mgp= c(2, 0.75, 0), mar= c(2,2,1,1), oma= c(2,2,2,0))  ## Put 4 graphs/row
for(x in samples){
    trim<- 2000
    peak_len= macs[macs$file_basename == x, "peak_length"]
    hist(peak_len[peak_len < trim]/100, main= '', xlab= '', ylab= '', breaks= 20, xlim= c(0, trim/100))
    legend('topright', legend= paste(x, '\nTot.: ', length(peak_len), '\n<2 kb: ', length(peak_len[peak_len < trim]), sep= ''), bty= 'n', cex= 1, inset= c(0, -0.1), xjust= 1)
    }    
mtext(side= 3, outer= TRUE, text= 'Histograms of peak length (trimmed < 2kb)', line= 0.25)
mtext(side= 2, outer= TRUE, text= 'Frequency', cex= 0.85, line= 0.25)
mtext(side= 1, outer= TRUE, text= 'Peak length (x100 bp)', cex= 0.85, line= 0.5)
dev.off()

png('redmine_wiki/macs_peak_summit_pileup_boxplot.png', width = 0.8 * WIDTH, height = 0.8 * WIDTH * (no_samples/8), pointsize = 15)
par(mfrow= c(1, 2),  mgp= c(2, 0.75, 0), mar= c(4,1,0,1), oma= c(0,10,2,0), las= 1)  ## Put 4 graphs/row
boxplot(macs$pileup_at_summit ~ macs$file_basename, main= '', xlab= 'Tag count', horizontal= TRUE, col= 'lightblue', boxwex= 0.5, outline= FALSE)
boxplot(macs$pileup_at_summit ~ macs$file_basename, main= '',  xlab= 'Tag count (log scale)', horizontal= TRUE, names= FALSE, log= 'x', col= 'lightblue', , boxwex= 0.5)
mtext(text= 'Tag count at peak summits', side= 3, line= 0.75, outer= TRUE)
dev.off()
#png()